In [29]:
from pyfasta import Fasta
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
In [36]:
# Requires this file to be in the current working directory
all_trans = Fasta("Homo_sapiens.GRCh38.cdna.all.fa")
In [14]:
min_len = 500
count = 0
max_trans = 10
trans_to_keep = {}
for tname in all_trans.keys():
if count == max_trans:
break
if len(all_trans[tname]) > min_len:
trans_to_keep[tname] = str(all_trans[tname])
count += 1
In [33]:
trans_recs = []
for tname in trans_to_keep.keys():
trans_recs.append(SeqRecord(seq = Seq(trans_to_keep[tname]),
id = tname.split()[0],
description = ""))
In [34]:
out_fname = "{0}_trans_gt_{1}_bp.fasta".format(max_trans, min_len)
In [35]:
with open(out_fname, "w") as out_fhandle:
SeqIO.write(trans_recs, out_fhandle, "fasta")
In [ ]: